from typing import Iterable

import scrapy
from bs4 import BeautifulSoup
from scrapy import Request

from dumpDB.items import DumpfileItem


class TestSpider(scrapy.Spider):
    name = "sinanews"
    allowed_domains = ["sina.com.cn"]
    start_urls = ('https://news.sina.com.cn/', )

    def parse(self, response):
        item = DumpfileItem()
        bs = BeautifulSoup(response.body, 'lxml')
        item['name'] = bs.find('title').text
        item['keyword'] = bs.find(lambda tag: tag.get('name') == 'keywords').get('content')
        # item['urlcr'] = bs.find('div', {'class': 'footer-inner'}).find_all('p')[2].text
        # item['name'] = response.xpath("/html/head/title/text()").extract()
        # item['keyword'] = response.xpath("/html/head/meta[@name='keywords']/@content").extract()
        print(item)
        yield item